dragonFirst, we create a dataset containing element network information joined with other attributes of elements. We specifically only consider elements that are known to form at least five minerals on Earth since 4.33 Ga. The following elements are therefore excluded (in parentheses is given the number of minerals it forms): Dy (1), Er (1), Gd (1), Hf (1), Sm (1), Re (2), Rb (3).
# Prepare if file not present. Otherwise, read in the file.
if (file.exists(data_file)) {
element_networks_info <- read_csv(data_file)
} else {
mineral_threshold <- 5
dragon:::element_info %>%
# Remove those not present **since** 4.33 Ga
filter(element != "Tc") -> elements
purrr::map_df(elements$element, parse_element_network) %>%
left_join(elements) %>%
mutate(n_elements = as.numeric(n_elements),
n_minerals = as.numeric(n_minerals),
n_localities = as.numeric(n_localities)) %>%
filter(n_minerals >= mineral_threshold) -> element_networks_info
# Save to file
write_csv(element_networks_info, data_file)
}Here is the full dataset used for analysis:
datatable(element_networks_info)This section performs several calculations that are included in manuscript text. These calculations use the same data (element_networks_info) used for all other network calculations.
The following calculations ask either how many minerals an element forms, or how many elements it forms minerals with.
# How many minerals does carbon form?
element_networks_info %>%
filter(element == "C") %>%
pull(n_minerals)## [1] 417
# How many minerals does oxygen form?
element_networks_info %>%
filter(element == "O") %>%
pull(n_minerals)## [1] 3892
# How many minerals does hydrogen form?
element_networks_info %>%
filter(element == "H") %>%
pull(n_minerals)## [1] 2708
# How many minerals does hydrogen form?
element_networks_info %>%
filter(element == "N") %>%
pull(n_minerals)## [1] 85
# With how many elements, not including carbon itself, does carbon form minerals?
element_networks_info %>%
filter(element == "C") %>%
pull(n_elements)## [1] 50
# With how many elements, not including oxygen itself, does oxygen form minerals?
element_networks_info %>%
filter(element == "O") %>%
pull(n_elements)## [1] 66
# With how many elements, not including arsenic itself, does arsenic form minerals?
element_networks_info %>%
filter(element == "As") %>%
pull(n_elements)## [1] 59
# With how many elements, not including scandium itself, does scandium form minerals?
element_networks_info %>%
filter(element == "Sc") %>%
pull(n_elements)## [1] 15
The following calculations ask about the Sc, Ga, Br, Yb network at two time ranges.
elements <- c("Sc", "Ga", "Br", "Yb")
# How many minerals are in the network >= 1.7 Ga? (ignore the "NULL" that gets printed here!)
dragon::initialize_network(elements_of_interest = elements,
age_range = c(5, 1.7)) -> network_1.7
## NULL
network_1.7$edges %>%
select(mineral_name) %>%
distinct() %>%
tally()
## # A tibble: 1 × 1
## n
## <int>
## 1 12
# How many minerals are in the network at present? (ignore the "NULL" that gets printed here!)
dragon::initialize_network(elements_of_interest = elements,
age_range = c(5, 0)) -> network_presemt
## NULL
network_presemt$edges %>%
select(mineral_name) %>%
distinct() %>%
tally()
## # A tibble: 1 × 1
## n
## <int>
## 1 43
First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.
# regression
# No
#lm(n_minerals ~ n_elements, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# Yes
lm(log(n_minerals) ~ n_elements, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# No
#lm(n_minerals ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(n_minerals) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.
get_model_info(model_ylog, "n_elements") -> model_info
element_networks_info %>%
mutate(n_minerals_log = log(n_minerals)) %>%
full_join(
tibble(
element = c("C", "H", "O", "N", "P"), # "S" is manually added since overlaps with geom_smooth
label_color = green
)
) -> plot_data
make_plot(
plot_data,
n_elements,
n_minerals_log,
model_info$formula,
"Number of Elements",
"Log Number of Minerals") %>%
add_element_labels() +
# manually add in S
geom_text(
aes(label = ifelse(element == "S", "S", "")),
nudge_y = -0.2,
nudge_x = -0.8,
fontface = "bold",
color = green,
size = label.size
) -> model1_plot
model1_plotThe P-value for the above model is P=9.82194154781477e-35.
First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.
# regression
# No
#lm(n_localities ~ n_elements, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# Yes
lm(log(n_localities) ~ n_elements, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# No
#lm(n_localities ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(n_localities) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.
get_model_info(model_ylog, "n_elements") -> model_info
element_networks_info %>%
mutate(n_localities_log = log(n_localities)) %>%
full_join(
tibble(
element = c("C", "H", "O", "N", "P", "Au"), # all green except Au is black, and "S" has to be manually added
label_color = c(rep(green, 5), "black")
)
) -> plot_data
make_plot(
plot_data,
n_elements,
n_localities_log,
model_info$formula,
"Number of Elements",
"Log Number of Localities") %>%
add_element_labels(repel_seed = 1) +
# manually add in S
geom_text(
aes(label = ifelse(element == "S", "S", "")),
nudge_y = 0.1,
nudge_x = -0.7,
fontface = "bold",
color = green,
size = label.size
) -> model2_plot
model2_plotThe P-value for the above model is P=1.80797100886453e-21.
Note that there are six elements which do not appear in this analysis because they are missing crust data - C, H, N, REE (rare earth elements), Rh, Te.
First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log x and untransformed y, whose diagnostics best meet assumptions.
# regression
# No
#lm(n_elements ~ element_crust_percent_weight, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# No
#lm(log(n_elements) ~ element_crust_percent_weight, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# Yes
lm(n_elements ~ log(element_crust_percent_weight), data = element_networks_info) -> model_xlog
performance::check_model(model_xlog)# No
#lm(log(n_elements) ~ log(element_crust_percent_weight), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.
# Since we have a logged x, have to remake model:
element_networks_info %>%
mutate(element_crust_percent_weight_log = log(element_crust_percent_weight)) -> forfit
lm(n_elements ~ element_crust_percent_weight_log, data = forfit) -> model_xlog
get_model_info(model_xlog, "element_crust_percent_weight_log") -> model_info
element_networks_info %>%
mutate(element_crust_percent_weight_log = log(element_crust_percent_weight)) %>%
full_join(
tibble(
element = c(c("O", "P", "S"),
c("As", "Pb", "Sc", "Ga", "Yb", "Br", "U"),
c("Cu", "Fe", "Mn", "Zn", "Mo", "Co")), # manually "Ni"
label_color = c(rep(green, 3),
rep("black", 7),
rep(blue,6)) # Ni is manual
)
) -> plot_data
make_plot(
plot_data,
element_crust_percent_weight_log,
n_elements,
model_info$formula,
"Log crust percentage",
"Number of Elements") %>%
add_element_labels(repel_seed = 4) +
# manually add in Ni
geom_text(
aes(label = ifelse(element == "Ni", "Ni", "")),
nudge_y = -1.5,
nudge_x = 0.3,
fontface = "bold",
color = blue,
size = label.size
) -> model3_plot
model3_plotThe P-value for the above model is P=1.80281699716851e-12.
First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with both untransformed y and x, whose diagnostics best meet assumptions.
# Yes
lm(pauling ~ n_elements, data = element_networks_info) -> model_plain
performance::check_model(model_plain)# No
#lm(log(pauling) ~ n_elements, data = element_networks_info) -> model_ylog
#performance::check_model(model_ylog)
# No
#lm(pauling ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(pauling) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS NO RELATIONSHIP.:
get_model_info(model_plain, "n_elements") -> model_info
make_plot(
element_networks_info,
n_elements,
pauling,
model_info$formula,
"Number of Elements",
"Pauling electronegativity"
) -> model4_plot
model4_plotFirst, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.
# No
#lm(n_minerals ~ pauling, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# Yes
lm(log(n_minerals) ~ pauling, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# No
#lm(n_minerals ~ log(pauling), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No but close
#lm(log(pauling) ~ log(n_minerals), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS NO RELATIONSHIP.
get_model_info(model_ylog, "pauling") -> model_info
make_plot(
element_networks_info %>% mutate(n_minerals_log = log(n_minerals)),
pauling,
n_minerals_log,
model_info$formula,
"Pauling electronegativity",
"Log number of minerals formed"
) -> model5_plot
model5_plotFirst, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with both axes untransformed; all diagnostics are about the same, so we’ll use the regular data.
# Yes
lm(n_elements ~ number_of_protons, data = element_networks_info) -> model_plain
performance::check_model(model_plain)# No
#lm(log(n_elements) ~ number_of_protons, data = element_networks_info) -> model_ylog
#performance::check_model(model_ylog)
# No
#lm(n_elements ~ log(number_of_protons), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(n_elements) ~ log(number_of_protons), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A NEGATIVE RELATIONSHIP.
get_model_info(model_plain, "number_of_protons") -> model_info
element_networks_info %>%
full_join(
tibble(
element = c(c("C", "H", "O", "P", "S", "N"),
c("As", "Pb", "Sc", "Ga", "Yb", "Br", "U", "Bi"),
c("Cu", "Fe", "Mn", "Zn", "Mo", "Co", "Ni")),
label_color = c(rep("darkgreen", 6),
rep("black", 8),
rep("dodgerblue",7))
)
) -> plot_data
make_plot(
plot_data,
number_of_protons,
n_elements,
model_info$formula,
"Atomic number", # variable is number_of_protons which is equivalent
"Number of Elements"
) %>%
add_element_labels() -> model6_plot
model6_plotThe P-value for the above model is P=5.50001479811203e-05.
Here we export figures to PDF files for use in manuscript.
Figure 1 is two panels comprised of model1_plot and model2_plot.
fig1_grid <- plot_grid(model1_plot, model2_plot, labels = "AUTO", nrow = 2)
save_plot(ms_fig1_file, fig1_grid, base_height = 10, base_width = 7)Figure 2 is two panels comprised of model3_plot and model6_plot.
fig2_grid <- plot_grid(model3_plot, model6_plot, labels = "AUTO", nrow = 2)
save_plot(ms_fig2_file, fig2_grid, base_height = 10, base_width = 7)The following shows the R version and package versions loaded for this analysis to enable reproducibility.
sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.22 cowplot_1.1.1 ggrepel_0.9.1 ggtext_0.1.1
## [5] performance_0.9.0 broom_0.7.12 dragon_1.2.1 forcats_0.5.1
## [9] stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4 readr_2.1.2
## [13] tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 rprojroot_2.0.3
## [4] markdown_1.1 fs_1.5.2 gridtext_0.1.4
## [7] rstudioapi_0.13 roxygen2_7.1.2 farver_2.1.0
## [10] bit64_4.0.5 golem_0.3.2 fansi_1.0.3
## [13] lubridate_1.8.0 xml2_1.3.3 splines_4.1.2
## [16] knitr_1.38 config_0.3.1 pkgload_1.2.4
## [19] jsonlite_1.8.0 dbplyr_2.1.1 shinydashboard_0.7.2
## [22] shiny_1.7.1 compiler_4.1.2 httr_1.4.2
## [25] backports_1.4.1 assertthat_0.2.1 Matrix_1.4-1
## [28] fastmap_1.1.0 cli_3.2.0 later_1.3.0
## [31] htmltools_0.5.2 tools_4.1.2 igraph_1.3.0
## [34] gtable_0.3.0 glue_1.6.2 Rcpp_1.0.8.3
## [37] cellranger_1.1.0 jquerylib_0.1.4 vctrs_0.4.0
## [40] nlme_3.1-157 crosstalk_1.2.0 insight_0.17.0
## [43] xfun_0.30 brio_1.1.3 testthat_3.1.3
## [46] rvest_1.0.2 mime_0.12 lifecycle_1.0.1
## [49] scales_1.1.1 vroom_1.5.7 ragg_1.2.2
## [52] hms_1.1.1 promises_1.2.0.1 parallel_4.1.2
## [55] yaml_2.3.5 see_0.7.0 sass_0.4.1
## [58] stringi_1.7.6 highr_0.9 bayestestR_0.11.5
## [61] desc_1.4.1 attempt_0.3.1 rlang_1.0.2
## [64] pkgconfig_2.0.3 systemfonts_1.0.4 evaluate_0.15
## [67] lattice_0.20-45 patchwork_1.1.1 htmlwidgets_1.5.4
## [70] labeling_0.4.2 bit_4.0.4 tidyselect_1.1.2
## [73] magrittr_2.0.3 R6_2.5.1 generics_0.1.2
## [76] DBI_1.1.2 pillar_1.7.0 haven_2.4.3
## [79] withr_2.5.0 mgcv_1.8-40 datawizard_0.4.0
## [82] modelr_0.1.8 crayon_1.5.1 shinyWidgets_0.6.4
## [85] utf8_1.2.2 tzdb_0.3.0 rmarkdown_2.13
## [88] usethis_2.1.5 grid_4.1.2 readxl_1.4.0
## [91] reprex_2.0.1 digest_0.6.29 xtable_1.8-4
## [94] httpuv_1.6.5 textshaping_0.3.6 munsell_0.5.0
## [97] bslib_0.3.1